In [1]:
%matplotlib inline
import pandas as pd
from tools import ready_data
# read csv file and reoder data as input files, voter1, voter2, ...
file1 = 'results-3AFT28WXLF2Y43L0GI7MY3EYIU1IO5.csv'
file2 = 'results-3P4ZBJFX2V3MZLSC6WSBC0ES548FWN.csv'
In [2]:
## Correlation p-values for Pearson's r
from scipy.stats import pearsonr

## Adapted from: https://stackoverflow.com/questions/25571882/pandas-columns-correlation-with-statistical-significance/45507587#45507587
def corr_pvalues(df):
    # df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = pearsonr(df[r], df[c])[1]
    return pvalues

Boxplot of likert score

Art_freeform_AP_03

In [3]:
df1, df2, name1, header1 = ready_data(file1)
print("The boxplot of sketch %s"%name1)

# drop the last 7 rows, since the metrics are not necessary in boxplot
df_2_refine = df2.drop(df2.tail(7).index)
df_2_refine = df_2_refine.sort_index(axis=1)
boxplot1 = df_2_refine.boxplot(grid=False, rot=90, showmeans = True)
boxplot1.figure.savefig('Art_freeform_AP_03-boxplot.pdf')
boxplot1
The boxplot of sketch Art_freeform_AP_03
Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x123d5abe0>
In [4]:
df_corr = df2.copy()
df_corr.loc[ df_corr.voter.str.match('voter_[0-9]+'), 'voter' ] = 'human'
df_corr.loc[ df_corr.voter == 'voter_chamfer', 'voter' ] = 'chamfer'
df_corr
Out[4]:
voter ALG: PolyVector2StrokeAggregator GT: Branislav Mirkovic GT: Ge Jin GT: Santiago Rial ALG: PolyVector ALG: RealTimeInking ALG: TopologyDriven2StrokeAggregator ALG: TopologyDriven ALG: FidelitySimplicity ALG: StrokeAggregator ALG: DelaunayTriangulation ALG: MasteringSketching
0 human 2.000000 5.0 4.0 5.0 2.000000 2.000000 2.000000 1.000000 2.000000 4.000000 1.000000 3.000000
1 human 4.000000 3.0 4.0 1.0 3.000000 4.000000 4.000000 4.000000 4.000000 4.000000 2.000000 2.000000
2 human 2.000000 1.0 3.0 2.0 2.000000 2.000000 1.000000 3.000000 1.000000 2.000000 2.000000 4.000000
3 human 2.000000 4.0 3.0 3.0 2.000000 4.000000 3.000000 3.000000 2.000000 3.000000 2.000000 3.000000
4 human 2.000000 2.0 3.0 3.0 4.000000 3.000000 1.000000 4.000000 1.000000 2.000000 2.000000 5.000000
5 human 2.000000 3.0 3.0 3.0 2.000000 4.000000 3.000000 4.000000 2.000000 3.000000 2.000000 4.000000
6 human 3.000000 2.0 3.0 2.0 3.000000 3.000000 1.000000 2.000000 1.000000 3.000000 2.000000 4.000000
7 human 3.000000 3.0 3.0 3.0 4.000000 3.000000 3.000000 3.000000 3.000000 2.000000 3.000000 3.000000
8 human 4.000000 3.0 2.0 4.0 3.000000 2.000000 4.000000 3.000000 3.000000 3.000000 3.000000 5.000000
9 human 3.000000 3.0 2.0 3.0 3.000000 2.000000 1.000000 2.000000 2.000000 3.000000 1.000000 4.000000
10 human 1.000000 5.0 5.0 4.0 2.000000 3.000000 1.000000 1.000000 2.000000 2.000000 1.000000 1.000000
11 human 3.000000 2.0 2.0 3.0 4.000000 3.000000 1.000000 3.000000 3.000000 2.000000 3.000000 4.000000
12 human 4.000000 2.0 2.0 3.0 4.000000 4.000000 1.000000 3.000000 1.000000 3.000000 2.000000 5.000000
13 human 3.000000 2.0 2.0 2.0 3.000000 3.000000 1.000000 3.000000 3.000000 3.000000 2.000000 4.000000
14 human 4.000000 2.0 2.0 3.0 3.000000 3.000000 2.000000 4.000000 1.000000 2.000000 2.000000 2.000000
15 human 3.000000 2.0 2.0 2.0 4.000000 3.000000 1.000000 2.000000 4.000000 3.000000 2.000000 4.000000
16 human 1.000000 4.0 3.0 3.0 1.000000 2.000000 2.000000 1.000000 2.000000 2.000000 1.000000 3.000000
17 human 2.000000 2.0 2.0 3.0 3.000000 2.000000 2.000000 4.000000 3.000000 4.000000 3.000000 5.000000
18 human 3.000000 2.0 3.0 2.0 3.000000 2.000000 1.000000 4.000000 2.000000 2.000000 1.000000 5.000000
19 human 2.000000 3.0 4.0 2.0 3.000000 2.000000 2.000000 3.000000 4.000000 2.000000 3.000000 5.000000
20 chamfer 0.001637 0.0 0.0 0.0 0.001457 0.001558 0.001875 0.001549 0.002359 0.001904 0.002245 0.001655
21 hausdorff 0.028434 0.0 0.0 0.0 0.031331 0.032005 0.029833 0.032679 0.030017 0.031212 0.028636 0.207692
22 f1_score_6 0.936035 1.0 1.0 1.0 0.944784 0.945530 0.942294 0.936695 0.905982 0.923612 0.915595 0.948062
23 f1_score_16 0.994049 1.0 1.0 1.0 0.994178 0.989077 0.994987 0.994032 0.985935 0.991529 0.980132 0.994761
24 f1_score_26 0.999622 1.0 1.0 1.0 0.999587 0.997840 0.999490 0.999388 0.997765 0.999036 0.998954 0.999317
25 f1_score_36 1.000000 1.0 1.0 1.0 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 0.999938
26 IOU 0.181755 1.0 1.0 1.0 0.201805 0.183727 0.210625 0.194510 0.187392 0.142973 0.186873 0.360232

Correlation of mean Likert scores to computed metrics

In [5]:
df_corr_agg = df_corr.groupby('voter').mean().T
In [6]:
df_corr_agg.corr()
Out[6]:
voter IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 1.000000 -0.948773 0.712306 0.614617 0.038102 0.938849 -0.282519 0.234198
chamfer -0.948773 1.000000 -0.823923 -0.681241 -0.110374 -0.984524 0.335403 -0.327931
f1_score_16 0.712306 -0.823923 1.000000 0.752086 -0.081329 0.842332 -0.090452 0.443812
f1_score_26 0.614617 -0.681241 0.752086 1.000000 -0.027827 0.691472 -0.125825 0.202899
f1_score_36 0.038102 -0.110374 -0.081329 -0.027827 1.000000 0.017500 -0.969025 -0.693383
f1_score_6 0.938849 -0.984524 0.842332 0.691472 0.017500 1.000000 -0.243494 0.334571
hausdorff -0.282519 0.335403 -0.090452 -0.125825 -0.969025 -0.243494 1.000000 0.615132
human 0.234198 -0.327931 0.443812 0.202899 -0.693383 0.334571 0.615132 1.000000
In [7]:
# print( "Correlation coefficient, p value" )
# print( pearsonr( df_corr_agg.chamfer, df_corr_agg.human ) )
corr_pvalues( df_corr_agg )
Out[7]:
IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 0 2.54866e-06 0.00934311 0.0334524 0.906414 6.07424e-06 0.373613 0.463776
chamfer 2.54866e-06 0 0.000983727 0.014716 0.73274 6.81159e-09 0.286522 0.298059
f1_score_16 0.00934311 0.000983727 0 0.00477916 0.801608 0.000585191 0.779813 0.148382
f1_score_26 0.0334524 0.014716 0.00477916 1.32832e-79 0.931591 0.0127449 0.696798 0.527091
f1_score_36 0.906414 0.73274 0.801608 0.931591 0 0.95695 2.13186e-07 0.0123997
f1_score_6 6.07424e-06 6.81159e-09 0.000585191 0.0127449 0.95695 0 0.44569 0.287793
hausdorff 0.373613 0.286522 0.779813 0.696798 2.13186e-07 0.44569 0 0.0332628
human 0.463776 0.298059 0.148382 0.527091 0.0123997 0.287793 0.0332628 0

Correlation without MasteringSketching Likert scores

In [8]:
df_corr_agg = df_corr.groupby('voter').mean().drop(columns = ["ALG: MasteringSketching"]).T

Correlation is not defined (NaN) for constant data. F-score with a very high threshold (36/1000) find perfect similarity (a constant 1) in all cases except MasteringSketching. With MasteringSketching excluded, correlation is no longer defined.

In [9]:
df_corr_agg.corr()
Out[9]:
voter IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 1.000000 -0.951065 0.718304 0.616363 NaN 0.939007 -0.995194 0.361945
chamfer -0.951065 1.000000 -0.840805 -0.688786 NaN -0.988785 0.930712 -0.564760
f1_score_16 0.718304 -0.840805 1.000000 0.752607 NaN 0.846689 -0.687655 0.539445
f1_score_26 0.616363 -0.688786 0.752607 1.000000 NaN 0.692333 -0.618913 0.254903
f1_score_36 NaN NaN NaN NaN NaN NaN NaN NaN
f1_score_6 0.939007 -0.988785 0.846689 0.692333 NaN 1.000000 -0.917426 0.481229
hausdorff -0.995194 0.930712 -0.687655 -0.618913 NaN -0.917426 1.000000 -0.319034
human 0.361945 -0.564760 0.539445 0.254903 NaN 0.481229 -0.319034 1.000000
In [10]:
corr_pvalues( df_corr_agg )
scipy/stats/stats.py:3845: PearsonRConstantInputWarning: An input array is constant; the correlation coefficent is not defined.
  warnings.warn(PearsonRConstantInputWarning())
Out[10]:
IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 9.37045e-72 6.91846e-06 0.0127771 0.0434357 NaN 1.83144e-05 2.1504e-10 0.274041
chamfer 6.91846e-06 0 0.00118592 0.0190803 NaN 9.65094e-09 3.21154e-05 0.0702674
f1_score_16 0.0127771 0.00118592 0 0.00752149 NaN 0.00101002 0.0193589 0.0867828
f1_score_26 0.0434357 0.0190803 0.00752149 9.37045e-72 NaN 0.0182251 0.0423308 0.449379
f1_score_36 NaN NaN NaN NaN NaN NaN NaN NaN
f1_score_6 1.83144e-05 9.65094e-09 0.00101002 0.0182251 NaN 0 6.93504e-05 0.133979
hausdorff 2.1504e-10 3.21154e-05 0.0193589 0.0423308 NaN 6.93504e-05 9.37045e-72 0.338933
human 0.274041 0.0702674 0.0867828 0.449379 NaN 0.133979 0.338933 0

Ind_architecture_TU_02

In [11]:
df3, df4, name2, header2 = ready_data(file2)
print("The boxplot of sketch %s"%name2)

df_4_refine = df4.drop(df4.tail(7).index)
df_4_refine = df_4_refine.sort_index(axis=1)
boxplot2 = df_4_refine.boxplot(grid=False, rot=90, showmeans = True)
boxplot2.figure.savefig('Ind_architecture_TU_02-boxplot.pdf')
boxplot2
The boxplot of sketch Ind_architecture_TU_02
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x122f69ca0>
In [12]:
df_corr = df4.copy()
df_corr.loc[ df_corr.voter.str.match('voter_[0-9]+'), 'voter' ] = 'human'
df_corr.loc[ df_corr.voter == 'voter_chamfer', 'voter' ] = 'chamfer'
df_corr
Out[12]:
voter ALG: DelaunayTriangulation GT: Branislav Mirkovic GT: Ge Jin GT: Santiago Rial ALG: FidelitySimplicity ALG: MasteringSketching ALG: PolyVector2StrokeAggregator ALG: PolyVector ALG: RealTimeInking ALG: StrokeAggregator ALG: TopologyDriven2StrokeAggregator ALG: TopologyDriven
0 human 1.000000 4.0 4.0 3.0 3.000000 3.000000 1.000000 3.000000 3.000000 4.000000 2.000000 4.000000
1 human 1.000000 3.0 4.0 3.0 2.000000 4.000000 1.000000 3.000000 3.000000 3.000000 1.000000 3.000000
2 human 1.000000 4.0 4.0 4.0 2.000000 3.000000 1.000000 3.000000 3.000000 3.000000 2.000000 3.000000
3 human 5.000000 2.0 5.0 2.0 4.000000 5.000000 4.000000 4.000000 4.000000 2.000000 3.000000 4.000000
4 human 1.000000 3.0 5.0 4.0 1.000000 2.000000 1.000000 2.000000 2.000000 3.000000 1.000000 2.000000
5 human 1.000000 3.0 4.0 4.0 3.000000 5.000000 2.000000 3.000000 3.000000 3.000000 1.000000 2.000000
6 human 1.000000 4.0 1.0 2.0 5.000000 3.000000 1.000000 3.000000 2.000000 3.000000 1.000000 3.000000
7 human 1.000000 3.0 5.0 4.0 3.000000 2.000000 1.000000 2.000000 2.000000 2.000000 1.000000 2.000000
8 human 1.000000 3.0 5.0 3.0 3.000000 2.000000 1.000000 4.000000 4.000000 3.000000 4.000000 4.000000
9 human 3.000000 4.0 4.0 2.0 3.000000 5.000000 1.000000 4.000000 3.000000 3.000000 2.000000 4.000000
10 human 1.000000 4.0 5.0 3.0 2.000000 5.000000 4.000000 2.000000 4.000000 3.000000 5.000000 2.000000
11 human 3.000000 5.0 4.0 4.0 4.000000 5.000000 5.000000 3.000000 4.000000 3.000000 2.000000 3.000000
12 human 1.000000 5.0 5.0 4.0 2.000000 3.000000 1.000000 3.000000 3.000000 2.000000 2.000000 3.000000
13 human 1.000000 5.0 4.0 4.0 3.000000 5.000000 2.000000 3.000000 4.000000 4.000000 2.000000 4.000000
14 human 1.000000 2.0 3.0 2.0 3.000000 5.000000 2.000000 4.000000 3.000000 4.000000 3.000000 4.000000
15 human 1.000000 3.0 2.0 1.0 3.000000 4.000000 2.000000 1.000000 2.000000 3.000000 2.000000 4.000000
16 human 1.000000 5.0 4.0 5.0 2.000000 4.000000 1.000000 4.000000 3.000000 3.000000 2.000000 4.000000
17 human 2.000000 3.0 4.0 3.0 2.000000 3.000000 1.000000 3.000000 3.000000 2.000000 1.000000 3.000000
18 human 1.000000 2.0 3.0 3.0 1.000000 3.000000 1.000000 2.000000 4.000000 2.000000 1.000000 2.000000
19 human 1.000000 5.0 5.0 5.0 2.000000 4.000000 1.000000 3.000000 3.000000 3.000000 2.000000 3.000000
20 chamfer 0.005779 0.0 0.0 0.0 0.002199 0.001895 0.007552 0.002049 0.002286 0.001997 0.003086 0.002060
21 hausdorff 0.062610 0.0 0.0 0.0 0.031401 0.445286 0.087727 0.030806 0.034132 0.030416 0.032802 0.030529
22 f1_score_6 0.660439 1.0 1.0 1.0 0.934647 0.941383 0.672101 0.933972 0.919399 0.943903 0.882557 0.934014
23 f1_score_16 0.920560 1.0 1.0 1.0 0.995793 0.996916 0.838325 0.996824 0.991350 0.996142 0.989999 0.996826
24 f1_score_26 0.967280 1.0 1.0 1.0 0.999677 0.999620 0.914185 0.999625 0.998038 0.999569 0.999152 0.999637
25 f1_score_36 0.984570 1.0 1.0 1.0 1.000000 0.999981 0.951234 1.000000 1.000000 1.000000 1.000000 1.000000
26 IOU 0.097052 1.0 1.0 1.0 0.153100 0.314451 0.134667 0.155384 0.167559 0.119429 0.205412 0.158741

Correlation of mean Likert scores to computed metrics

In [13]:
df_corr_agg = df_corr.groupby('voter').mean().T
In [14]:
df_corr_agg.corr()
Out[14]:
voter IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 1.000000 -0.679687 0.332705 0.287209 0.266692 0.561123 -0.208283 0.622172
chamfer -0.679687 1.000000 -0.902308 -0.866482 -0.845680 -0.967367 0.134853 -0.870024
f1_score_16 0.332705 -0.902308 1.000000 0.994668 0.986828 0.896473 -0.052373 0.712887
f1_score_26 0.287209 -0.866482 0.994668 1.000000 0.998108 0.847406 -0.052947 0.654864
f1_score_36 0.266692 -0.845680 0.986828 0.998108 1.000000 0.816480 -0.054210 0.625932
f1_score_6 0.561123 -0.967367 0.896473 0.847406 0.816480 1.000000 -0.086359 0.876085
hausdorff -0.208283 0.134853 -0.052373 -0.052947 -0.054210 -0.086359 1.000000 0.167720
human 0.622172 -0.870024 0.712887 0.654864 0.625932 0.876085 0.167720 1.000000
In [15]:
corr_pvalues( df_corr_agg )
Out[15]:
IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 0 0.0150339 0.290658 0.365386 0.402076 0.0576718 0.515944 0.0307471
chamfer 0.0150339 0 5.93602e-05 0.000265938 0.000528771 2.75915e-07 0.676051 0.000233932
f1_score_16 0.290658 5.93602e-05 0 3.36382e-11 3.05469e-09 7.85376e-05 0.871582 0.00925923
f1_score_26 0.365386 0.000265938 3.36382e-11 1.32832e-79 1.90511e-13 0.000501379 0.870187 0.0208273
f1_score_36 0.402076 0.000528771 3.05469e-09 1.90511e-13 0 0.00119397 0.867113 0.0294612
f1_score_6 0.0576718 2.75915e-07 7.85376e-05 0.000501379 0.00119397 0 0.789575 0.000186208
hausdorff 0.515944 0.676051 0.871582 0.870187 0.867113 0.789575 0 0.602345
human 0.0307471 0.000233932 0.00925923 0.0208273 0.0294612 0.000186208 0.602345 1.32832e-79
In [16]:
print( pearsonr( df_corr_agg.chamfer, df_corr_agg.human ) )
(-0.8700239786589308, 0.00023393190568248435)

Correlation without MasteringSketching Likert scores

In [17]:
df_corr_agg = df_corr.groupby('voter').mean().drop(columns = ["ALG: MasteringSketching"]).T
In [18]:
df_corr_agg.corr()
Out[18]:
voter IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 1.000000 -0.685918 0.342518 0.295953 0.274825 0.570597 -0.764868 0.681997
chamfer -0.685918 1.000000 -0.902927 -0.866435 -0.845269 -0.967758 0.986779 -0.902723
f1_score_16 0.342518 -0.902927 1.000000 0.994608 0.986702 0.895226 -0.865794 0.717963
f1_score_26 0.295953 -0.866435 0.994608 1.000000 0.998094 0.845466 -0.834301 0.657667
f1_score_36 0.274825 -0.845269 0.986702 0.998094 1.000000 0.814179 -0.815732 0.628119
f1_score_6 0.570597 -0.967758 0.895226 0.845466 0.814179 1.000000 -0.926732 0.898829
hausdorff -0.764868 0.986779 -0.865794 -0.834301 -0.815732 -0.926732 1.000000 -0.864294
human 0.681997 -0.902723 0.717963 0.657667 0.628119 0.898829 -0.864294 1.000000
In [19]:
corr_pvalues( df_corr_agg )
Out[19]:
IOU chamfer f1_score_16 f1_score_26 f1_score_36 f1_score_6 hausdorff human
voter
IOU 0 0.0197922 0.302489 0.376888 0.413422 0.0667844 0.00609998 0.0207962
chamfer 0.0197922 9.37045e-72 0.000140567 0.000559544 0.00105055 1.08421e-06 2.01756e-08 0.000141856
f1_score_16 0.302489 0.000140567 2.12029e-70 3.60388e-10 2.07113e-08 0.000195927 0.000571186 0.0128398
f1_score_26 0.376888 0.000559544 3.60388e-10 0 3.36618e-12 0.00104484 0.00140607 0.0278514
f1_score_36 0.413422 0.00105055 2.07113e-08 3.36618e-12 9.37045e-72 0.00228352 0.00220415 0.0385041
f1_score_6 0.0667844 1.08421e-06 0.000195927 0.00104484 0.00228352 2.12029e-70 4.10524e-05 0.000168282
hausdorff 0.00609998 2.01756e-08 0.000571186 0.00140607 0.00220415 4.10524e-05 0 0.000599116
human 0.0207962 0.000141856 0.0128398 0.0278514 0.0385041 0.000168282 0.000599116 0

Does the rank of algorithms stay consistent?

A low p-value means that the two rankings are the same.

In [20]:
df_ind = df4[ df4.voter.str.match('voter_[0-9]+') ].drop( columns=['voter'] ).mean()
df_ind
Out[20]:
ALG: DelaunayTriangulation              1.45
GT: Branislav Mirkovic                  3.60
GT: Ge Jin                              4.00
GT: Santiago Rial                       3.25
ALG: FidelitySimplicity                 2.65
ALG: MasteringSketching                 3.75
ALG: PolyVector2StrokeAggregator        1.70
ALG: PolyVector                         2.95
ALG: RealTimeInking                     3.10
ALG: StrokeAggregator                   2.90
ALG: TopologyDriven2StrokeAggregator    2.00
ALG: TopologyDriven                     3.15
dtype: float64
In [21]:
df_art = df2[ df4.voter.str.match('voter_[0-9]+') ].drop( columns=['voter'] ).mean()
df_art
Out[21]:
ALG: PolyVector2StrokeAggregator        2.65
GT: Branislav Mirkovic                  2.75
GT: Ge Jin                              2.85
GT: Santiago Rial                       2.80
ALG: PolyVector                         2.90
ALG: RealTimeInking                     2.80
ALG: TopologyDriven2StrokeAggregator    1.85
ALG: TopologyDriven                     2.85
ALG: FidelitySimplicity                 2.30
ALG: StrokeAggregator                   2.70
ALG: DelaunayTriangulation              2.00
ALG: MasteringSketching                 3.75
dtype: float64
In [22]:
likert_averages = pd.DataFrame({'Art_freeform_AP_03': df_art, 'Ind_architecture_TU_02': df_ind })
likert_averages
Out[22]:
Art_freeform_AP_03 Ind_architecture_TU_02
ALG: DelaunayTriangulation 2.00 1.45
ALG: FidelitySimplicity 2.30 2.65
ALG: MasteringSketching 3.75 3.75
ALG: PolyVector 2.90 2.95
ALG: PolyVector2StrokeAggregator 2.65 1.70
ALG: RealTimeInking 2.80 3.10
ALG: StrokeAggregator 2.70 2.90
ALG: TopologyDriven 2.85 3.15
ALG: TopologyDriven2StrokeAggregator 1.85 2.00
GT: Branislav Mirkovic 2.75 3.60
GT: Ge Jin 2.85 4.00
GT: Santiago Rial 2.80 3.25
In [23]:
import scipy.stats
scipy.stats.wilcoxon( likert_averages.Art_freeform_AP_03, likert_averages.Ind_architecture_TU_02 )
# scipy.stats.wilcoxon( likert_averages.Art_freeform_AP_03, likert_averages.Art_freeform_AP_03 + 0.01 )
scipy/stats/morestats.py:2958: UserWarning: Exact p-value calculation does not work if there are ties. Switching to normal approximation.
  warnings.warn("Exact p-value calculation does not work if there are "
Out[23]:
WilcoxonResult(statistic=18.0, pvalue=0.18231443298387762)